# ------------------------------------------------------------------------------
# Trains primary models in train set and predicts in test set.
# Author: Cassidy Shubatt <cshubatt@gmail.com>
# To run: bsub -q big -R "rusage[mem=100000]" make
# ------------------------------------------------------------------------------

# Config -----------------------------------------------------------------------
.ONESHELL:
include config.mk

# Input variable default -------------------------------------------------------
# override with `make SPLIT={split} RESTRICTION={restriction}`
# SPLITS: {random,overnight}
SPLIT=random
# RESTRICTIONS: {all,dropcc,justcc,dem,enc,dia,lab,lvs,med,prc,represent}
RESTRICTION=all

# Directory target -------------------------------------------------------------
DIRECTORIES := $(foreach dir,$(MODELING_DIRS), $(dir)/.f ) log/.f

# Model targets ----------------------------------------------------------------
LASSOS	:= $(foreach outcome,$(OUTCOMES), \
	$(MODELS)/lasso__$(outcome).rds \
)
TUNING_RESULTS	:= $(foreach outcome,$(OUTCOMES),\
	$(TUNING)/gbm__$(outcome).rds \
)
GBMS		:= $(foreach outcome,$(OUTCOMES), \
	$(MODELS)/gbm__$(outcome).rds \
)

# Ensembling targets -----------------------------------------------------------
SUBS 			:= $(foreach dataset,val test,\
	$(SUBSCORES)/subscores_$(dataset)_set.rds \
)
ENSEMBLES	:= $(foreach outcome,$(OUTCOMES),\
	$(MODELS)/ensemble__$(outcome).rds \
)
SCORES		:= $(foreach dataset,val test,\
	$(PREDICTION)/scores_$(dataset)_set.rds \
)
# Recipes ----------------------------------------------------------------------
## all		: Constructs all modeling targets.
.PHONY : all
all : scores

## prep			: Prepares features and cohort data for models.
.PHONY : prep
prep : $(COHORT_FILES)
$(COHORTS)/%.rds: 01_prep_modeling_data.sh scripts/01_prep_modeling_data.R \
$(DIRECTORIES)
	bash $< $(SPLIT)

## lassos		: Builds LASSO models.
.PHONY : lassos
lassos : $(LASSOS)
$(MODELS)/lasso__%.rds : 02_fit_lasso.sh scripts/02_fit_lasso.R \
../model_config/%.yml $(COHORT_FILES)
	bash $< $* $(SPLIT) $(RESTRICTION)

## tune			: Tunes GBM models.
.PHONY : tune
tune : $(TUNING_RESULTS)
$(TUNING)/gbm__%.rds : 03_tune_gbm.sh scripts/03_tune_gbm.R \
../model_config/%.yml $(COHORT_FILES)
	bash $< $* $(SPLIT) $(RESTRICTION)

## gbms			: Builds GBM models.
.PHONY : gbms
gbms : $(GBMS)
$(MODELS)/gbm__%.rds : 04_fit_gbm.sh scripts/04_fit_gbm.R \
../model_config/%.yml $(TUNING)/gbm__%.rds $(COHORT_FILES)
	bash $< $* $(SPLIT) $(RESTRICTION)

## subscores	: Predicts subscores for all GBMs, LASSOs.
.PHONY : subscores
subscores : $(SUBS)
$(SUBSCORES)/subscores_%_set.rds : 05_predict_ensemble_components.sh \
scripts/05_predict_ensemble_components.R $(LASSOS) $(GBMS)
	bash $< $* $(SPLIT) $(RESTRICTION)

## ensembles	: Generates ensemble models for all outcomes.
.PHONY : ensembles
ensembles : $(ENSEMBLES)
$(MODELS)/ensemble__%.rds : 06_train_ensemble.sh scripts/06_train_ensemble.R \
../ensemble_config/%.yml $(SUBSCORES)/subscores_val_set.rds
	bash $< $* $(SPLIT) $(RESTRICTION)

## scores 		: Generates predictions from ensemble models.
.PHONY : scores
scores : $(SCORES)
$(PREDICTION)/scores_%_set.rds : 07_predict_ensemble.sh \
scripts/07_predict_ensemble.R $(ENSEMBLES) $(SUBSCORES)/subscores_%_set.rds
	bash $< $* $(SPLIT) $(RESTRICTION)

# Config targets ---------------------------------------------------------------
## dirs 		: Builds all directories.
.PHONY : dirs
dirs : $(DIRECTORIES)
%/.f :
	mkdir -p $(dir $@)
	touch $@

## style			: Styles R scripts according to tidyverse guidelines
.PHONY: style
style:
	source ~/anaconda3/etc/profile.d/conda.sh
	conda activate stressr
	R --vanilla --silent -e "styler::style_dir(path = 'scripts', filetype = c('R', 'Rmd', 'Rprofile'))"

## clean		: Deletes all models.
.PHONY : clean
clean :
	rm -rf $(MODELING)/*

.PHONY : help
help : Makefile
	@sed -n 's/^##//p' $<
